Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
mutate(sample=factor(sample,levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ individual, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun))# A tibble: 10 × 2
phylum mean
<chr> <dbl>
1 p__Bacillota 0.0189
2 p__Bacillota_A 0.236
3 p__Bacillota_B 0.00355
4 p__Bacillota_C 0.00260
5 p__Bacteroidota 0.218
6 p__Campylobacterota 0.0995
7 p__Cyanobacteriota 0.000493
8 p__Desulfobacterota 0.0116
9 p__Pseudomonadota 0.404
10 p__Verrucomicrobiota 0.00600
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
#scale_color_manual(values=c("#F4D9AE","#DE9E46","#2D8077","#CD4F41")) +
#geom_boxplot() +
geom_jitter(alpha=0.5) +
theme_minimal()
## Taxonomy boxplot
5.1.3 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
#geom_boxplot() +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal()Warning in left_join(., genome_metadata %>% select(family, phylum) %>% unique(), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 7 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
5.1.4 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__")`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus= sub("^g__", "", genus)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
#geom_boxplot() +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal()5.2 Alpha diversity
# Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, tree = genome_tree) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(phylogenetic = 1) %>%
rownames_to_column(var = "sample")
# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
to.elements(., GIFT_db) %>%
traits2dist(., method = "gower")
functional <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, dist = dist) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(functional = 1) %>%
rownames_to_column(var = "sample") %>%
mutate(functional = if_else(is.nan(functional), 1, functional))
# Merge all metrics
alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
full_join(phylogenetic, by = join_by(sample == sample)) %>%
full_join(functional, by = join_by(sample == sample))alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
ggplot(aes(y = value, x = type, group=type, color=type, fill=type)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha=0.5) +
scale_color_manual(name="Sample type",
breaks=c("cloaca","feces"),
labels=c("Cloaca","Faeces"),
values=c("#e5bd5b", "#6b7398")) +
scale_fill_manual(name="Sample type",
breaks=c("cloaca","feces"),
labels=c("Cloaca","Faeces"),
values=c("#e5bd5b50", "#6b739850")) +
facet_wrap(. ~ metric, scales = "free", ncol=4) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)alpha_div %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
lmerTest::lmer(richness ~ type + (1 | individual), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()boundary (singular) fit: see help('isSingular')
| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 1.600000 | 2.987725 | 0.5355245 | 20 | 5.981925e-01 |
| fixed | NA | typefeces | 61.300000 | 4.225281 | 14.5079106 | 20 | 4.444567e-12 |
| ran_pars | individual | sd__(Intercept) | 0.000000 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 9.448016 | NA | NA | NA | NA |
alpha_div %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
lmerTest::lmer(neutral ~ type + (1 | individual), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()boundary (singular) fit: see help('isSingular')
| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 1.300518e+00 | 1.359874 | 0.9563518 | 20 | 3.503127e-01 |
| fixed | NA | typefeces | 3.294989e+01 | 1.923152 | 17.1332730 | 20 | 2.024047e-13 |
| ran_pars | individual | sd__(Intercept) | 4.971903e-09 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 4.300298e+00 | NA | NA | NA | NA |
alpha_div %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
lmerTest::lmer(phylogenetic ~ type + (1 | individual), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()boundary (singular) fit: see help('isSingular')
| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 1.2384408 | 0.2335543 | 5.302583 | 20 | 3.441968e-05 |
| fixed | NA | typefeces | 3.1026737 | 0.3302956 | 9.393626 | 20 | 8.961863e-09 |
| ran_pars | individual | sd__(Intercept) | 0.0000000 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 0.7385635 | NA | NA | NA | NA |
alpha_div %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
lmerTest::lmer(functional ~ type + (1 | individual), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()boundary (singular) fit: see help('isSingular')
| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 1.052834e+00 | 0.03546038 | 29.690432 | 20 | 5.139978e-18 |
| fixed | NA | typefeces | 4.136317e-01 | 0.05014855 | 8.248128 | 20 | 7.248330e-08 |
| ran_pars | individual | sd__(Intercept) | 3.057788e-11 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 1.121356e-01 | NA | NA | NA | NA |
5.3 Beta diversity
beta_q0n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 0)
beta_q1n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1)
beta_q1p <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1, tree = genome_tree)
beta_q1f <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1, dist = dist)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.07339 0.073391 0.6442 999 0.413
Residuals 18 2.05080 0.113933
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
cloaca feces
cloaca 0.433
feces 0.43268
adonis2(beta_q0n$C ~ type,
data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))),
permutations = 999,
strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| type | 1 | 3.005780 | 0.4664694 | 15.73752 | 0.002 |
| Residual | 18 | 3.437902 | 0.5335306 | NA | NA |
| Total | 19 | 6.443682 | 1.0000000 | NA | NA |
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.01774 0.01774 0.1446 999 0.692
Residuals 18 2.20802 0.12267
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
cloaca feces
cloaca 0.694
feces 0.70818
adonis2(beta_q1n$C ~ type,
data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))),
permutations = 999,
strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| type | 1 | 2.322537 | 0.362287 | 10.22586 | 0.002 |
| Residual | 18 | 4.088228 | 0.637713 | NA | NA |
| Total | 19 | 6.410765 | 1.000000 | NA | NA |
#Phylogenetic diversity
betadisper(beta_q1p$C, sample_metadata$type) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.07195 0.071945 1.0792 999 0.35
Residuals 18 1.20001 0.066667
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
cloaca feces
cloaca 0.443
feces 0.31264
adonis2(beta_q1p$C ~ type,
data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))),
permutations = 999,
strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| type | 1 | 2.893508 | 0.6789794 | 38.07116 | 0.002 |
| Residual | 18 | 1.368047 | 0.3210206 | NA | NA |
| Total | 19 | 4.261555 | 1.0000000 | NA | NA |
#Functional diversity
betadisper(beta_q1f$C, sample_metadata$type) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.15953 0.159526 7.2962 999 0.01 **
Residuals 18 0.39355 0.021864
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
cloaca feces
cloaca 0.006
feces 0.014616
adonis2(beta_q1f$C ~ type,
data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))),
permutations = 999,
strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| type | 1 | 3.1347963 | 0.8009941 | 72.44958 | 0.002 |
| Residual | 18 | 0.7788359 | 0.1990059 | NA | NA |
| Total | 19 | 3.9136321 | 1.0000000 | NA | NA |
5.3.1 Neutral diversity plot
beta_q0n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, verbosity = FALSE, trace=FALSE) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(type) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
mutate(individual=factor(individual, levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = type, shape = individual)) +
scale_color_manual(name="Sample type",
breaks=c("cloaca","feces"),
labels=c("Cloaca","Faeces"),
values=c("#e5bd5b", "#6b7398")) +
scale_shape_manual(values = 1:10) +
geom_point(size = 4) +
# stat_ellipse(aes(color = beta_q1n_nmds$Groups))+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme_classic() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 20, face = "bold"),
axis.text = element_text(face = "bold", size = 18),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 16),
legend.title = element_text(size = 18),
legend.position = "right", legend.box = "vertical"
) +
labs(shape="Individual")5.3.2 Functional diversity plot
beta_q1f$C %>%
vegan::metaMDS(., trymax = 500, k = 2, verbosity = FALSE, trace=FALSE) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(type) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(individual=factor(individual, levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = type)) +
scale_color_manual(name="Sample type",
breaks=c("cloaca","feces"),
labels=c("Cloaca","Faeces"),
values=c("#e5bd5b", "#6b7398")) +
scale_shape_manual(values = 1:10) +
geom_point(size = 4) +
# stat_ellipse(aes(color = beta_q1n_nmds$Groups))+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme_classic() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 20, face = "bold"),
axis.text = element_text(face = "bold", size = 18),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 16),
legend.title = element_text(size = 18),
legend.position = "right", legend.box = "vertical"
)